#delimit;
set more off;

set logtype text;

log using /home/dnc2101/Accidental_Deaths/Log_Files/make_control_data_log.log, replace;


/************************************************************************************
*
* This do file takes the raw Census data on the number of persons in different 
* demographic groups in each state and year, cleans it, and generates the state-year
* level demographic variables that are used in making the state-year demographic 
* control variables included in the regressions for the JSL and accidental deaths
* paper
* 
* Program by Dan Carvell, written between Summer 2007 and Spring 2010.  
*
************************************************************************************/






/* The raw data from the Census comes in 3 seperate sets of years - 1981 through 1989, 1990 through 1999, and 2000 through 2006    */
/* The format of the Census data changes between these three sets of years                                                         */
/* Therefore, this do file has to manipulate each set of years separately, and then append the 3 sets of years of data together    */

/* The raw Census data for the years 1981-1989 for this do file come from http://www.census.gov/popest/archives/1980s/80s_st_detail.html           */
/* The raw Census data for the years 1990-1999 for this do file come from http://www.census.gov/popest/archives/1990s/nat_sex_race_hispanic.html   */
/* The raw Census data for the years 2000-2006 for this do file come from http://www.census.gov/popest/states/asrh/SC-EST2006-04.html              */









** First I'm generating the variables I need in the 1981 through 1989 data;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_81_89_raw.dta", clear;




** First I need the population of African-Americans;

keep if (race==2 | race==6);

gen total_blacks = age_0_4 + age_5_9 + age_10_14 + age_15_19 + age_20_24 + age_25_29 + age_30_34 + age_35_39 + age_40_44 + age_45_49 + age_50_54 + age_55_59 + age_60_64 + age_65_69 + age_70_74 + age_75_79 + age_80_84 + age_85_or_above;

drop age_0_4 age_5_9 age_10_14 age_15_19 age_20_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_69 age_70_74 age_75_79 age_80_84 age_85_or_above;

collapse (sum) total_blacks, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_81_89.dta", replace;








/* now create population count of other minorities, which'd be Asian/ Pacific Islander (race of 4 if not Hispanic, 8 if Hispanic and also Asian/ Pacific Islander)    */
/* and Native American/Eskimo/Aleut (3 if not Hispanic, 7 if Hispanic as well as Native American)                                                                     */


use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_81_89_raw.dta", clear;

keep if (race==3 | race==4 | race==7 | race==8);

gen total_other = age_0_4 + age_5_9 + age_10_14 + age_15_19 + age_20_24 + age_25_29 + age_30_34 + age_35_39 + age_40_44 + age_45_49 + age_50_54 + age_55_59 + age_60_64 + age_65_69 + age_70_74 + age_75_79 + age_80_84 + age_85_or_above;

drop age_0_4 age_5_9 age_10_14 age_15_19 age_20_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_69 age_70_74 age_75_79 age_80_84 age_85_or_above;

collapse (sum) total_other, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_81_89.dta", replace;









** Next variable to make - the number of people under the age of 5 in each state in each year;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_81_89_raw.dta", clear;

drop age_5_9 age_10_14 age_15_19 age_20_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_69 age_70_74 age_75_79 age_80_84 age_85_or_above;

collapse (sum) age_0_4, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_81_89.dta", replace;




** Next, state-year population of people age 65 and over;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_81_89_raw.dta", clear;

gen age_65_or_above= age_65_69 + age_70_74 + age_75_79 + age_80_84 + age_85_or_above;

drop age_0_4 age_5_9 age_10_14 age_15_19 age_20_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_69 age_70_74 age_75_79 age_80_84 age_85_or_above;

collapse (sum) age_65_or_above, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_81_89.dta", replace;







** Next, males between the ages of 15 and 24;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_81_89_raw.dta", clear;

** Females have gender equal to 2;

drop if gender==2;

gen males_age_15_to_24 = age_15_19 + age_20_24;

drop age_0_4 age_5_9 age_10_14 age_15_19 age_20_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_69 age_70_74 age_75_79 age_80_84 age_85_or_above;

collapse (sum) males_age_15_to_24, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_81_89.dta", replace;



















/* Now, I generate the variables I need in the Census data that covers the years 1990 through 1999      */



** First, number of African Americans;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop white_male_nh white_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

gen total_blacks = black_male_nh + black_female_nh + black_male_hspo + black_female_hspo;

drop black_male_nh black_female_nh black_male_hspo black_female_hspo;

collapse (sum) total_blacks, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_90_99.dta", replace;






** Now, number of other minorities - Asians / Pacific Islanders and Native Americans;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop white_male_nh white_female_nh black_male_nh black_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo;

gen total_other= aian_male_nh + aian_female_nh + api_male_nh + api_female_nh + aian_male_hspo + aian_female_hspo + api_male_hspo + api_female_hspo; 

drop aian_male_nh aian_female_nh api_male_nh api_female_nh aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;   

collapse (sum) total_other, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_90_99.dta", replace;







** Next variable to make - the number of people under the age of 5 in each state in each year;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop if Age>4;

gen age_0_4= white_male_nh + white_female_nh + black_male_nh + black_female_nh + aian_male_nh + aian_female_nh + api_male_nh + api_female_nh + white_male_hspo + white_female_hspo + black_male_hspo + black_female_hspo + aian_male_hspo + aian_female_hspo + api_male_hspo + api_female_hspo;

drop white_male_nh white_female_nh black_male_nh black_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

collapse (sum) age_0_4, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_90_99.dta", replace;






** Next, state-year population of people age 65 and over;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop if Age<65;

gen age_65_or_above= white_male_nh + white_female_nh + black_male_nh + black_female_nh + aian_male_nh + aian_female_nh + api_male_nh + api_female_nh + white_male_hspo + white_female_hspo + black_male_hspo + black_female_hspo + aian_male_hspo + aian_female_hspo + api_male_hspo + api_female_hspo;

drop white_male_nh white_female_nh black_male_nh black_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

collapse (sum) age_65_or_above, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_90_99.dta", replace;







** Next, males between the ages of 15 and 24;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_90_99_raw.dta", clear;

drop if (Age<15 | Age>24);

gen males_age_15_to_24 = white_male_nh + black_male_nh + aian_male_nh + api_male_nh + white_male_hspo + black_male_hspo + aian_male_hspo + api_male_hspo;

drop white_male_nh white_female_nh black_male_nh black_female_nh aian_male_nh aian_female_nh api_male_nh api_female_nh white_male_hspo white_female_hspo black_male_hspo black_female_hspo aian_male_hspo aian_female_hspo api_male_hspo api_female_hspo;

collapse (sum) males_age_15_to_24, by (statefip year);

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_90_99.dta", replace;
















/* Now I generate these variables in the data covering the years 2000 to 2006                             */
/* The data for 2000-2006 comes in two seperate files - one with data from Alabama through Missouri,      */
/* the other with data from Montana through Wyoming.  These datasets need to be reshaped from wide into   */
/* long format, and then appended together                                                                */



use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_06_AL_MO_raw.dta", clear;

** I only want to use data called Popestimate data, which is for the population on July 1st of each year - so I drop variables that are for the population on April 1st of each year;

drop Census2000Pop;
drop Estimatesbase2000;
gen id=_n;
order id;
reshape long Popestimate, i(id) j(year);

order id state year Popestimate;

** I have no plans to use data from 2006, so I'll drop it;

drop if year==2006;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_06_AL_MO_reshaped.dta", replace;



** Now, same thing for data from Montana through Wyoming;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_06_MT_WY_raw.dta", clear;

drop Census2000Pop;
drop Estimatesbase2000;
gen id=_n;
order id;
reshape long Popestimate, i(id) j(year);

order id state year Popestimate;

drop if year==2006;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_06_MT_WY_reshaped.dta", replace;

** Now, append data from these two sets of states together;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_06_AL_MO_reshaped.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_06_MT_WY_reshaped.dta";

** I don't actually need summary level, Census region and Census division variables;

drop sumlev region division;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", replace;






** Open the 2000-2005 data for all of the states again, and generate the variables I need;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

** create count of African Americans - Blacks are Race 2;

drop if Race!=2;

collapse (sum) Popestimate, by (state year);

** rename variables for appending data;

gen total_blacks= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year total_blacks;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_00_05.dta", replace;





** now, other minorities - Asians, Pacific Islanders and Native Americans;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

** Other minorities are 3 for American Indian etc, 4 for Asian, 5 for Pacific Islander including Hawaiian;
** Race of 6 is multiracial and I won't try including them as Other;

drop if (Race==1 | Race==2 | Race==6);

collapse (sum) Popestimate, by (state year);

** rename variables for appending data;

gen total_other= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year total_other;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_00_05.dta", replace;






** Next variable to make - the number of people under the age of 5 in each state in each year;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

drop if age>4;

collapse (sum) Popestimate, by (state year);

gen age_0_4= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year age_0_4;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_00_05.dta", replace;







** Next, state-year population of people age 65 and over;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

drop if age<65;

collapse (sum) Popestimate, by (state year);

gen age_65_or_above= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year age_65_or_above;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_00_05.dta", replace;







** Next, males between the ages of 15 and 24;

use "/home/dnc2101/Accidental_Deaths/Raw_Data/race_age_gender_00_05_AL_WY_reshaped.dta", clear;

drop if age<15;
drop if age>24;

** drop females;
drop if sex==2;

collapse (sum) Popestimate, by (state year);

gen males_age_15_to_24= Popestimate;
drop Popestimate;
gen statefip=state;
drop state;

order statefip year males_age_15_to_24;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_00_05.dta", replace;













/* Now, append data from the different years of Census data together - then export it for adding in to the other raw data,  */
/* and to make it easier to browse through it to check for mistakes and inconsistencies over time                           */


use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_81_89.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_90_99.dta";

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_00_05.dta";

sort statefip year;

/* Note that they years 1981-1989 are entered as 1, 2, 3, ... 9 whereas 1990 to 1999 are entered as 1990 to 1999,          */
/* but since I'm exporting this as an Excel file and adding these variables into the raw Excel data this works out fine    */

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_81_05.dta", replace;

xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_blacks_81_05.xml", doctype(excel) replace;








use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_81_89.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_90_99.dta";

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_00_05.dta";

sort statefip year;

save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_81_05.dta", replace;

xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_other_minorities_81_05.xml", doctype(excel) replace;







use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_81_89.dta", clear;

append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_90_99.dta";
append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_00_05.dta";

sort statefip year;
save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_81_05.dta", replace;
xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_0_to_4_years_81_05.xml", doctype(excel) replace;




use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_81_89.dta", clear;
append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_90_99.dta";
append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_00_05.dta";

sort statefip year;
save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_81_05.dta", replace;
xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_age_65_or_older_81_05.xml", doctype(excel) replace;




use "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_81_89.dta", clear;
append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_90_99.dta";
append using "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_00_05.dta";

sort statefip year;
save "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_81_05.dta", replace;
xmlsave "/home/dnc2101/Accidental_Deaths/Raw_Data/pop_males_15_to_24_years_81_05.xml", doctype(excel) replace;












log close;




